package com.example.demo.utils;

import com.example.demo.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @author lee
 * @date 2021年06月14日 18:40
 */
@Component
public class HtmlParseUtil {




    public static void main(String[] args) throws IOException {
        try {
            List<Content> list = new HtmlParseUtil().parseJd("java");
            for (Content content : list) {
                System.out.println(content.toString());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public List<Content> parseJd(String keyword) throws Exception {
        ArrayList<Content> objects = new ArrayList<>();
        String url = "http://search.jd.com/search?keyword="+keyword;
        Document document = Jsoup.parse(new URL(url), 30000);

        Element element = document.getElementById("J_goodsList");

        Elements li = element.getElementsByTag("li");
        for (Element el : li) {
            String img = "https:"+el.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();
            String href = "https:" + el.getElementsByClass("p-name").eq(0).select("a").attr("href");
            Content content = new Content(img,price,title,href);
            objects.add(content);
        }
        return objects;
    }

    public List<Content> parseJdByPage(String keyword,Integer pageNo,Integer pageSize) throws Exception {
        ArrayList<Content> objects = new ArrayList<>();
        String url = "http://search.jd.com/search?keyword="+keyword+"&page="+pageNo+"&s="+(pageNo-1)*30+1+"";
        Document document = Jsoup.parse(new URL(url), 30000);

        Element element = document.getElementById("J_goodsList");

        Elements li = element.getElementsByTag("li");
        for (Element el : li) {
            String img = "https:" + el.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();
            String href = "https:" + el.getElementsByClass("p-name").eq(0).select("a").attr("href");
            Content content = new Content(img,price,title,href);
            objects.add(content);
        }
        return objects;
    }

}
